{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "cf7e717e",
   "metadata": {},
   "source": [
    "## Notebook 1 - Standardize substrates"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fb2380b0",
   "metadata": {},
   "source": [
    "This notebook standardizes the substrate SMILES and calculates the expected m/z for the corresponding glucoconjugates."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cd13f100",
   "metadata": {},
   "outputs": [],
   "source": [
    "%run ../common.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "172c2319",
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "\n",
    "from rdkit.Chem import MolFromSmiles\n",
    "from rdkit.Chem.Descriptors import ExactMolWt\n",
    "from rdkit import Chem\n",
    "from rdkit.Chem.MolStandardize import rdMolStandardize\n",
    "\n",
    "from rdkit.Chem.Draw import IPythonConsole\n",
    "from rdkit.Chem import rdDepictor\n",
    "from rdkit.Chem.Draw import rdMolDraw2D\n",
    "from IPython.display import SVG"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d134998a",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Mix</th>\n",
       "      <th>Name</th>\n",
       "      <th>InchiKey</th>\n",
       "      <th>ik_MoNA</th>\n",
       "      <th>SMILES</th>\n",
       "      <th>RT</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>3ALPHA-HYDROXY-3-DEOXYANGOLENSIC ACID METHYL E...</td>\n",
       "      <td>QFRUZVNPYYYLAN-BBJYNIMOSA-N</td>\n",
       "      <td>QFRUZVNPYYYLAN</td>\n",
       "      <td>C[C@@]12CCC3C(=C)[C@]1(CC(=O)O[C@H]2C4=COC=C4)...</td>\n",
       "      <td>10.38</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>3-AMINO-BETA-PINENE</td>\n",
       "      <td>SQSDBXYJKLVZJR-UHFFFAOYSA-N</td>\n",
       "      <td>SQSDBXYJKLVZJR</td>\n",
       "      <td>CC1(C2CC1C(=C)C(C2)N)C.Cl</td>\n",
       "      <td>5.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>3BETA-HYDROXY-23,24-BISNORCHOL-5-ENIC ACID</td>\n",
       "      <td>NPBNRBWMDNZEBN-YTEKVJICSA-N</td>\n",
       "      <td>NPBNRBWMDNZEBN</td>\n",
       "      <td>CC([C@H]1CCC2[C@@]1(CCC3C2CC=C4[C@@]3(CC[C@@H]...</td>\n",
       "      <td>13.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>3-HYDROXYTYRAMINE</td>\n",
       "      <td>VYFYYTLLBUKUHU-UHFFFAOYSA-N</td>\n",
       "      <td>VYFYYTLLBUKUHU</td>\n",
       "      <td>C1=CC(=C(C=C1CCN)O)O</td>\n",
       "      <td>11.47</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>AVOCADYNE ACETATE</td>\n",
       "      <td>JAKAZHIACKJNNB-UHFFFAOYSA-N</td>\n",
       "      <td>JAKAZHIACKJNNB</td>\n",
       "      <td>CC(=O)OCC(CC(CCCCCCCCCCCC#C)O)O</td>\n",
       "      <td>11.12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>459</th>\n",
       "      <td>12</td>\n",
       "      <td>SPERMIDINE</td>\n",
       "      <td>ATHGHQPFGPMSJY-UHFFFAOYSA-N</td>\n",
       "      <td>ATHGHQPFGPMSJY</td>\n",
       "      <td>C(CCNCCCN)CN</td>\n",
       "      <td>0.63</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>460</th>\n",
       "      <td>12</td>\n",
       "      <td>STEVIOL</td>\n",
       "      <td>QFVOYBUQQBFCRH-VQSWZGCSSA-N</td>\n",
       "      <td>QFVOYBUQQBFCRH</td>\n",
       "      <td>C[C@@]12CCC[C@@]([C@H]1CC[C@]34[C@H]2CC[C@](C3...</td>\n",
       "      <td>11.15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>461</th>\n",
       "      <td>12</td>\n",
       "      <td>STIGMASTEROL</td>\n",
       "      <td>HCXVJBMSMIARIN-PHZDYDNGSA-N</td>\n",
       "      <td>HCXVJBMSMIARIN</td>\n",
       "      <td>CC[C@H](/C=C/[C@@H](C)[C@H]1CC[C@@H]2[C@@]1(CC...</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>462</th>\n",
       "      <td>12</td>\n",
       "      <td>TODDALOLACTONE</td>\n",
       "      <td>GLWPLQBQHWYKRK-UHFFFAOYSA-N</td>\n",
       "      <td>GLWPLQBQHWYKRK</td>\n",
       "      <td>CC(C)(C(CC1=C(C=C2C(=C1OC)C=CC(=O)O2)OC)O)O</td>\n",
       "      <td>6.60</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>463</th>\n",
       "      <td>12</td>\n",
       "      <td>TOMATINE</td>\n",
       "      <td>REJLGAUYTKNVJM-UHFFFAOYSA-N</td>\n",
       "      <td>REJLGAUYTKNVJM</td>\n",
       "      <td>CC1CCC2(C(C3C(O2)CC4C3(CCC5C4CCC6C5(CCC(C6)OC7...</td>\n",
       "      <td>6.39</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>464 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Mix                                               Name  \\\n",
       "0      1  3ALPHA-HYDROXY-3-DEOXYANGOLENSIC ACID METHYL E...   \n",
       "1      1                                3-AMINO-BETA-PINENE   \n",
       "2      1         3BETA-HYDROXY-23,24-BISNORCHOL-5-ENIC ACID   \n",
       "3      1                                  3-HYDROXYTYRAMINE   \n",
       "4      1                                  AVOCADYNE ACETATE   \n",
       "..   ...                                                ...   \n",
       "459   12                                         SPERMIDINE   \n",
       "460   12                                            STEVIOL   \n",
       "461   12                                       STIGMASTEROL   \n",
       "462   12                                     TODDALOLACTONE   \n",
       "463   12                                           TOMATINE   \n",
       "\n",
       "                        InchiKey         ik_MoNA  \\\n",
       "0    QFRUZVNPYYYLAN-BBJYNIMOSA-N  QFRUZVNPYYYLAN   \n",
       "1    SQSDBXYJKLVZJR-UHFFFAOYSA-N  SQSDBXYJKLVZJR   \n",
       "2    NPBNRBWMDNZEBN-YTEKVJICSA-N  NPBNRBWMDNZEBN   \n",
       "3    VYFYYTLLBUKUHU-UHFFFAOYSA-N  VYFYYTLLBUKUHU   \n",
       "4    JAKAZHIACKJNNB-UHFFFAOYSA-N  JAKAZHIACKJNNB   \n",
       "..                           ...             ...   \n",
       "459  ATHGHQPFGPMSJY-UHFFFAOYSA-N  ATHGHQPFGPMSJY   \n",
       "460  QFVOYBUQQBFCRH-VQSWZGCSSA-N  QFVOYBUQQBFCRH   \n",
       "461  HCXVJBMSMIARIN-PHZDYDNGSA-N  HCXVJBMSMIARIN   \n",
       "462  GLWPLQBQHWYKRK-UHFFFAOYSA-N  GLWPLQBQHWYKRK   \n",
       "463  REJLGAUYTKNVJM-UHFFFAOYSA-N  REJLGAUYTKNVJM   \n",
       "\n",
       "                                                SMILES     RT  \n",
       "0    C[C@@]12CCC3C(=C)[C@]1(CC(=O)O[C@H]2C4=COC=C4)...  10.38  \n",
       "1                            CC1(C2CC1C(=C)C(C2)N)C.Cl   5.00  \n",
       "2    CC([C@H]1CCC2[C@@]1(CCC3C2CC=C4[C@@]3(CC[C@@H]...  13.75  \n",
       "3                                 C1=CC(=C(C=C1CCN)O)O  11.47  \n",
       "4                      CC(=O)OCC(CC(CCCCCCCCCCCC#C)O)O  11.12  \n",
       "..                                                 ...    ...  \n",
       "459                                       C(CCNCCCN)CN   0.63  \n",
       "460  C[C@@]12CCC[C@@]([C@H]1CC[C@]34[C@H]2CC[C@](C3...  11.15  \n",
       "461  CC[C@H](/C=C/[C@@H](C)[C@H]1CC[C@@H]2[C@@]1(CC...    NaN  \n",
       "462        CC(C)(C(CC1=C(C=C2C(=C1OC)C=CC(=O)O2)OC)O)O   6.60  \n",
       "463  CC1CCC2(C(C3C(O2)CC4C3(CCC5C4CCC6C5(CCC(C6)OC7...   6.39  \n",
       "\n",
       "[464 rows x 6 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_substrates = pd.read_csv('../data/screening_data/Substrates.csv')\n",
    "df_substrates['Name'] = df_substrates['Name'].str.upper()\n",
    "df_substrates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "6f1d03cb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "453"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_substrates.InchiKey.nunique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "90a3fd05",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "453"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_substrates.SMILES.nunique()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1ce433af",
   "metadata": {},
   "source": [
    "## Standardize SMILES"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "6a87e195",
   "metadata": {},
   "outputs": [],
   "source": [
    "def standardize(smiles):\n",
    "    '''Adapted from: https://bitsilla.com/blog/2021/06/standardizing-a-molecule-using-rdkit/'''\n",
    "    # follows the steps in\n",
    "    # https://github.com/greglandrum/RSC_OpenScience_Standardization_202104/blob/main/MolStandardize%20pieces.ipynb\n",
    "    # as described **excellently** (by Greg) in\n",
    "    # https://www.youtube.com/watch?v=eWTApNX8dJQ\n",
    "    mol = Chem.MolFromSmiles(smiles, sanitize=True)\n",
    "     \n",
    "    # removeHs, disconnect metal atoms, normalize the molecule, reionize the molecule\n",
    "    clean_mol = rdMolStandardize.Cleanup(mol) \n",
    "     \n",
    "    # if many fragments, get the \"parent\" (the actual mol we are interested in) \n",
    "    parent_clean_mol = rdMolStandardize.FragmentParent(clean_mol)\n",
    "         \n",
    "    # try to neutralize molecule\n",
    "    uncharger = rdMolStandardize.Uncharger() # annoying, but necessary as no convenience method exists\n",
    "    uncharged_parent_clean_mol = uncharger.uncharge(parent_clean_mol)\n",
    "     \n",
    "    # note that no attempt is made at reionization at this step\n",
    "    # nor at ionization at some pH (rdkit has no pKa caculator)\n",
    "    # the main aim to to represent all molecules from different sources\n",
    "    # in a (single) standard way, for use in ML, catalogue, etc.\n",
    "     \n",
    "    # te = rdMolStandardize.TautomerEnumerator() # idem\n",
    "    # taut_uncharged_parent_clean_mol = te.Canonicalize(uncharged_parent_clean_mol)\n",
    "     \n",
    "    return Chem.MolToSmiles(uncharged_parent_clean_mol, isomericSmiles=False, canonical=True) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "b64ea8e6",
   "metadata": {},
   "outputs": [],
   "source": [
    "csmiles = []\n",
    "\n",
    "for smi in df_substrates['SMILES']:\n",
    "    csmi = standardize(smi)\n",
    "    csmiles.append(csmi)\n",
    "\n",
    "df_substrates['CSMILES'] = csmiles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "69bc9394",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Mix</th>\n",
       "      <th>Name</th>\n",
       "      <th>InchiKey</th>\n",
       "      <th>ik_MoNA</th>\n",
       "      <th>SMILES</th>\n",
       "      <th>RT</th>\n",
       "      <th>CSMILES</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>3ALPHA-HYDROXY-3-DEOXYANGOLENSIC ACID METHYL E...</td>\n",
       "      <td>QFRUZVNPYYYLAN-BBJYNIMOSA-N</td>\n",
       "      <td>QFRUZVNPYYYLAN</td>\n",
       "      <td>C[C@@]12CCC3C(=C)[C@]1(CC(=O)O[C@H]2C4=COC=C4)...</td>\n",
       "      <td>10.38</td>\n",
       "      <td>C=C1C2CCC3(C)C(c4ccoc4)OC(=O)CC13OC1CC(O)C(C)(...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>3-AMINO-BETA-PINENE</td>\n",
       "      <td>SQSDBXYJKLVZJR-UHFFFAOYSA-N</td>\n",
       "      <td>SQSDBXYJKLVZJR</td>\n",
       "      <td>CC1(C2CC1C(=C)C(C2)N)C.Cl</td>\n",
       "      <td>5.00</td>\n",
       "      <td>C=C1C(N)CC2CC1C2(C)C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>3BETA-HYDROXY-23,24-BISNORCHOL-5-ENIC ACID</td>\n",
       "      <td>NPBNRBWMDNZEBN-YTEKVJICSA-N</td>\n",
       "      <td>NPBNRBWMDNZEBN</td>\n",
       "      <td>CC([C@H]1CCC2[C@@]1(CCC3C2CC=C4[C@@]3(CC[C@@H]...</td>\n",
       "      <td>13.75</td>\n",
       "      <td>CC(C(=O)O)C1CCC2C3CC=C4CC(O)CCC4(C)C3CCC12C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>3-HYDROXYTYRAMINE</td>\n",
       "      <td>VYFYYTLLBUKUHU-UHFFFAOYSA-N</td>\n",
       "      <td>VYFYYTLLBUKUHU</td>\n",
       "      <td>C1=CC(=C(C=C1CCN)O)O</td>\n",
       "      <td>11.47</td>\n",
       "      <td>NCCc1ccc(O)c(O)c1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>AVOCADYNE ACETATE</td>\n",
       "      <td>JAKAZHIACKJNNB-UHFFFAOYSA-N</td>\n",
       "      <td>JAKAZHIACKJNNB</td>\n",
       "      <td>CC(=O)OCC(CC(CCCCCCCCCCCC#C)O)O</td>\n",
       "      <td>11.12</td>\n",
       "      <td>C#CCCCCCCCCCCCC(O)CC(O)COC(C)=O</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>459</th>\n",
       "      <td>12</td>\n",
       "      <td>SPERMIDINE</td>\n",
       "      <td>ATHGHQPFGPMSJY-UHFFFAOYSA-N</td>\n",
       "      <td>ATHGHQPFGPMSJY</td>\n",
       "      <td>C(CCNCCCN)CN</td>\n",
       "      <td>0.63</td>\n",
       "      <td>NCCCCNCCCN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>460</th>\n",
       "      <td>12</td>\n",
       "      <td>STEVIOL</td>\n",
       "      <td>QFVOYBUQQBFCRH-VQSWZGCSSA-N</td>\n",
       "      <td>QFVOYBUQQBFCRH</td>\n",
       "      <td>C[C@@]12CCC[C@@]([C@H]1CC[C@]34[C@H]2CC[C@](C3...</td>\n",
       "      <td>11.15</td>\n",
       "      <td>C=C1CC23CCC4C(C)(C(=O)O)CCCC4(C)C2CCC1(O)C3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>461</th>\n",
       "      <td>12</td>\n",
       "      <td>STIGMASTEROL</td>\n",
       "      <td>HCXVJBMSMIARIN-PHZDYDNGSA-N</td>\n",
       "      <td>HCXVJBMSMIARIN</td>\n",
       "      <td>CC[C@H](/C=C/[C@@H](C)[C@H]1CC[C@@H]2[C@@]1(CC...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>CCC(C=CC(C)C1CCC2C3CC=C4CC(O)CCC4(C)C3CCC12C)C...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>462</th>\n",
       "      <td>12</td>\n",
       "      <td>TODDALOLACTONE</td>\n",
       "      <td>GLWPLQBQHWYKRK-UHFFFAOYSA-N</td>\n",
       "      <td>GLWPLQBQHWYKRK</td>\n",
       "      <td>CC(C)(C(CC1=C(C=C2C(=C1OC)C=CC(=O)O2)OC)O)O</td>\n",
       "      <td>6.60</td>\n",
       "      <td>COc1cc2oc(=O)ccc2c(OC)c1CC(O)C(C)(C)O</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>463</th>\n",
       "      <td>12</td>\n",
       "      <td>TOMATINE</td>\n",
       "      <td>REJLGAUYTKNVJM-UHFFFAOYSA-N</td>\n",
       "      <td>REJLGAUYTKNVJM</td>\n",
       "      <td>CC1CCC2(C(C3C(O2)CC4C3(CCC5C4CCC6C5(CCC(C6)OC7...</td>\n",
       "      <td>6.39</td>\n",
       "      <td>CC1CCC2(NC1)OC1CC3C4CCC5CC(OC6OC(CO)C(OC7OC(CO...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>464 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Mix                                               Name  \\\n",
       "0      1  3ALPHA-HYDROXY-3-DEOXYANGOLENSIC ACID METHYL E...   \n",
       "1      1                                3-AMINO-BETA-PINENE   \n",
       "2      1         3BETA-HYDROXY-23,24-BISNORCHOL-5-ENIC ACID   \n",
       "3      1                                  3-HYDROXYTYRAMINE   \n",
       "4      1                                  AVOCADYNE ACETATE   \n",
       "..   ...                                                ...   \n",
       "459   12                                         SPERMIDINE   \n",
       "460   12                                            STEVIOL   \n",
       "461   12                                       STIGMASTEROL   \n",
       "462   12                                     TODDALOLACTONE   \n",
       "463   12                                           TOMATINE   \n",
       "\n",
       "                        InchiKey         ik_MoNA  \\\n",
       "0    QFRUZVNPYYYLAN-BBJYNIMOSA-N  QFRUZVNPYYYLAN   \n",
       "1    SQSDBXYJKLVZJR-UHFFFAOYSA-N  SQSDBXYJKLVZJR   \n",
       "2    NPBNRBWMDNZEBN-YTEKVJICSA-N  NPBNRBWMDNZEBN   \n",
       "3    VYFYYTLLBUKUHU-UHFFFAOYSA-N  VYFYYTLLBUKUHU   \n",
       "4    JAKAZHIACKJNNB-UHFFFAOYSA-N  JAKAZHIACKJNNB   \n",
       "..                           ...             ...   \n",
       "459  ATHGHQPFGPMSJY-UHFFFAOYSA-N  ATHGHQPFGPMSJY   \n",
       "460  QFVOYBUQQBFCRH-VQSWZGCSSA-N  QFVOYBUQQBFCRH   \n",
       "461  HCXVJBMSMIARIN-PHZDYDNGSA-N  HCXVJBMSMIARIN   \n",
       "462  GLWPLQBQHWYKRK-UHFFFAOYSA-N  GLWPLQBQHWYKRK   \n",
       "463  REJLGAUYTKNVJM-UHFFFAOYSA-N  REJLGAUYTKNVJM   \n",
       "\n",
       "                                                SMILES     RT  \\\n",
       "0    C[C@@]12CCC3C(=C)[C@]1(CC(=O)O[C@H]2C4=COC=C4)...  10.38   \n",
       "1                            CC1(C2CC1C(=C)C(C2)N)C.Cl   5.00   \n",
       "2    CC([C@H]1CCC2[C@@]1(CCC3C2CC=C4[C@@]3(CC[C@@H]...  13.75   \n",
       "3                                 C1=CC(=C(C=C1CCN)O)O  11.47   \n",
       "4                      CC(=O)OCC(CC(CCCCCCCCCCCC#C)O)O  11.12   \n",
       "..                                                 ...    ...   \n",
       "459                                       C(CCNCCCN)CN   0.63   \n",
       "460  C[C@@]12CCC[C@@]([C@H]1CC[C@]34[C@H]2CC[C@](C3...  11.15   \n",
       "461  CC[C@H](/C=C/[C@@H](C)[C@H]1CC[C@@H]2[C@@]1(CC...    NaN   \n",
       "462        CC(C)(C(CC1=C(C=C2C(=C1OC)C=CC(=O)O2)OC)O)O   6.60   \n",
       "463  CC1CCC2(C(C3C(O2)CC4C3(CCC5C4CCC6C5(CCC(C6)OC7...   6.39   \n",
       "\n",
       "                                               CSMILES  \n",
       "0    C=C1C2CCC3(C)C(c4ccoc4)OC(=O)CC13OC1CC(O)C(C)(...  \n",
       "1                                 C=C1C(N)CC2CC1C2(C)C  \n",
       "2          CC(C(=O)O)C1CCC2C3CC=C4CC(O)CCC4(C)C3CCC12C  \n",
       "3                                    NCCc1ccc(O)c(O)c1  \n",
       "4                      C#CCCCCCCCCCCCC(O)CC(O)COC(C)=O  \n",
       "..                                                 ...  \n",
       "459                                         NCCCCNCCCN  \n",
       "460        C=C1CC23CCC4C(C)(C(=O)O)CCCC4(C)C2CCC1(O)C3  \n",
       "461  CCC(C=CC(C)C1CCC2C3CC=C4CC(O)CCC4(C)C3CCC12C)C...  \n",
       "462              COc1cc2oc(=O)ccc2c(OC)c1CC(O)C(C)(C)O  \n",
       "463  CC1CCC2(NC1)OC1CC3C4CCC5CC(OC6OC(CO)C(OC7OC(CO...  \n",
       "\n",
       "[464 rows x 7 columns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_substrates"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cc14abaa",
   "metadata": {},
   "source": [
    "## Compute expected m/z for substrates"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8837c4f7",
   "metadata": {},
   "source": [
    "We consider that the substrate can form different adducts (https://fiehnlab.ucdavis.edu/staff/kind/metabolomics/ms-adduct-calculator/).\n",
    "\n",
    "Also note that we will override any precursor m/z in MoNA, assuming that the type of adduct does not affect significantly the fragmentation pattern in MS2."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "c300b3e9",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Adapted from https://stackoverflow.com/questions/30334385/display-svg-in-ipython-notebook-from-a-function\n",
    "    \n",
    "def moltosvg(mol, molSize = (300,300), kekulize = False):\n",
    "    mc = Chem.Mol(mol.ToBinary())\n",
    "    if kekulize:\n",
    "        try:\n",
    "            Chem.Kekulize(mc)\n",
    "        except:\n",
    "            mc = Chem.Mol(mol.ToBinary())\n",
    "    if not mc.GetNumConformers():\n",
    "        rdDepictor.Compute2DCoords(mc)\n",
    "    drawer = rdMolDraw2D.MolDraw2DSVG(molSize[0],molSize[1])\n",
    "    drawer.DrawMolecule(mc)\n",
    "    drawer.FinishDrawing()\n",
    "    svg = drawer.GetDrawingText()\n",
    "    return SVG(svg.replace('svg:',''))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "fab7adab",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Positive charge (+1) in COc1cc2c3cc1Oc1cc(ccc1O)CC1c4c(cc(OC)c(O)c4Oc4ccc(cc4)CC3N(C)CC2)CC[N+]1(C)C\n"
     ]
    },
    {
     "data": {
      "image/svg+xml": [
       "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:rdkit=\"http://www.rdkit.org/xml\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" version=\"1.1\" baseProfile=\"full\" xml:space=\"preserve\" width=\"300px\" height=\"300px\" viewBox=\"0 0 300 300\">\n",
       "<!-- END OF HEADER -->\n",
       "<rect style=\"opacity:1.0;fill:#FFFFFF;stroke:none\" width=\"300.0\" height=\"300.0\" x=\"0.0\" y=\"0.0\"> </rect>\n",
       "<path class=\"bond-0 atom-0 atom-1\" d=\"M 285.0,109.3 L 275.0,109.5\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-0 atom-0 atom-1\" d=\"M 275.0,109.5 L 265.1,109.7\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-1 atom-1 atom-2\" d=\"M 258.8,114.0 L 254.2,122.3\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-1 atom-1 atom-2\" d=\"M 254.2,122.3 L 249.7,130.6\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-2 atom-2 atom-3\" d=\"M 249.7,130.6 L 262.0,151.0\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-2 atom-2 atom-3\" d=\"M 247.6,134.2 L 257.9,151.4\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-3 atom-3 atom-4\" d=\"M 262.0,151.0 L 253.0,176.0\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-4 atom-4 atom-5\" d=\"M 253.0,176.0 L 229.2,177.8\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-4 atom-4 atom-5\" d=\"M 250.4,172.6 L 233.3,173.8\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-5 atom-5 atom-6\" d=\"M 229.2,177.8 L 231.7,154.1\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-6 atom-6 atom-7\" d=\"M 228.0,154.3 L 225.9,131.0\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-6 atom-6 atom-7\" d=\"M 231.7,154.1 L 230.5,134.5\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-7 atom-7 atom-8\" d=\"M 225.9,131.0 L 220.7,123.3\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-7 atom-7 atom-8\" d=\"M 220.7,123.3 L 215.4,115.6\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-8 atom-8 atom-9\" d=\"M 208.6,108.5 L 200.9,102.9\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-8 atom-8 atom-9\" d=\"M 200.9,102.9 L 193.2,97.3\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-9 atom-9 atom-10\" d=\"M 190.1,92.6 L 170.3,90.8\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-9 atom-9 atom-10\" d=\"M 193.2,97.3 L 170.0,94.5\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-10 atom-10 atom-11\" d=\"M 170.3,90.8 L 146.6,92.5\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-11 atom-11 atom-12\" d=\"M 146.6,92.5 L 154.7,69.4\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-11 atom-11 atom-12\" d=\"M 151.9,88.5 L 157.6,72.1\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-12 atom-12 atom-13\" d=\"M 154.7,69.4 L 176.4,59.7\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-13 atom-13 atom-14\" d=\"M 176.4,59.7 L 195.7,73.7\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-13 atom-13 atom-14\" d=\"M 176.0,63.9 L 191.8,75.4\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-14 atom-14 atom-15\" d=\"M 195.7,73.7 L 204.6,69.7\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-14 atom-14 atom-15\" d=\"M 204.6,69.7 L 213.5,65.7\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-15 atom-11 atom-16\" d=\"M 146.6,92.5 L 124.9,102.3\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-16 atom-16 atom-17\" d=\"M 124.9,102.3 L 107.8,118.8\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-17 atom-17 atom-18\" d=\"M 107.8,118.8 L 97.4,140.3\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-18 atom-18 atom-19\" d=\"M 97.4,140.3 L 73.7,142.0\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-18 atom-18 atom-19\" d=\"M 93.3,144.2 L 74.4,145.6\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-19 atom-19 atom-20\" d=\"M 73.7,142.0 L 49.2,150.9\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-20 atom-20 atom-21\" d=\"M 49.2,150.9 L 55.0,174.0\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-20 atom-20 atom-21\" d=\"M 53.5,153.2 L 58.0,171.1\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-21 atom-21 atom-22\" d=\"M 55.0,174.0 L 48.4,180.4\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-21 atom-21 atom-22\" d=\"M 48.4,180.4 L 41.8,186.8\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-22 atom-22 atom-23\" d=\"M 34.0,189.5 L 24.5,186.8\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-22 atom-22 atom-23\" d=\"M 24.5,186.8 L 15.0,184.1\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-23 atom-21 atom-24\" d=\"M 55.0,174.0 L 77.9,180.5\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-24 atom-24 atom-25\" d=\"M 77.9,180.5 L 80.3,189.9\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-24 atom-24 atom-25\" d=\"M 80.3,189.9 L 82.6,199.4\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-25 atom-24 atom-26\" d=\"M 77.9,180.5 L 95.0,163.9\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-25 atom-24 atom-26\" d=\"M 76.9,176.4 L 91.5,162.2\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-26 atom-26 atom-27\" d=\"M 95.0,163.9 L 97.3,173.4\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-26 atom-26 atom-27\" d=\"M 97.3,173.4 L 99.7,182.8\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-27 atom-27 atom-28\" d=\"M 103.7,191.3 L 108.9,199.0\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-27 atom-27 atom-28\" d=\"M 108.9,199.0 L 114.1,206.7\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-28 atom-28 atom-29\" d=\"M 114.1,206.7 L 119.9,229.8\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-28 atom-28 atom-29\" d=\"M 118.2,207.9 L 123.0,226.9\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-29 atom-29 atom-30\" d=\"M 119.9,229.8 L 142.8,236.3\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-30 atom-30 atom-31\" d=\"M 142.8,236.3 L 159.9,219.7\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-30 atom-30 atom-31\" d=\"M 141.8,232.2 L 155.9,218.6\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-31 atom-31 atom-32\" d=\"M 159.9,219.7 L 154.1,196.7\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-32 atom-32 atom-33\" d=\"M 154.1,196.7 L 131.2,190.1\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-32 atom-32 atom-33\" d=\"M 151.1,199.6 L 132.2,194.2\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-33 atom-31 atom-34\" d=\"M 159.9,219.7 L 201.8,215.7\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-34 atom-34 atom-35\" d=\"M 201.8,215.7 L 218.9,199.2\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-35 atom-35 atom-36\" d=\"M 218.9,199.2 L 224.1,206.9\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-35 atom-35 atom-36\" d=\"M 224.1,206.9 L 229.4,214.7\" style=\"fill:none;fill-rule:evenodd;stroke:#0000FF;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-36 atom-36 atom-37\" d=\"M 230.2,223.0 L 226.0,231.7\" style=\"fill:none;fill-rule:evenodd;stroke:#0000FF;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-36 atom-36 atom-37\" d=\"M 226.0,231.7 L 221.8,240.3\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-37 atom-36 atom-38\" d=\"M 235.6,218.6 L 245.8,217.9\" style=\"fill:none;fill-rule:evenodd;stroke:#0000FF;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-37 atom-36 atom-38\" d=\"M 245.8,217.9 L 256.0,217.2\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-38 atom-38 atom-39\" d=\"M 256.0,217.2 L 266.3,195.7\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-39 atom-19 atom-40\" d=\"M 73.7,142.0 L 60.3,122.3\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-40 atom-40 atom-41\" d=\"M 60.3,122.3 L 70.7,100.8\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-41 atom-41 atom-42\" d=\"M 70.7,100.8 L 80.9,100.1\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-41 atom-41 atom-42\" d=\"M 80.9,100.1 L 91.1,99.4\" style=\"fill:none;fill-rule:evenodd;stroke:#0000FF;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-42 atom-42 atom-43\" d=\"M 102.0,95.7 L 109.1,92.6\" style=\"fill:none;fill-rule:evenodd;stroke:#0000FF;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-42 atom-42 atom-43\" d=\"M 109.1,92.6 L 116.2,89.4\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-43 atom-42 atom-44\" d=\"M 93.4,95.0 L 91.0,85.5\" style=\"fill:none;fill-rule:evenodd;stroke:#0000FF;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-43 atom-42 atom-44\" d=\"M 91.0,85.5 L 88.6,76.0\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-44 atom-7 atom-2\" d=\"M 225.9,131.0 L 249.7,130.6\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-45 atom-14 atom-9\" d=\"M 195.7,73.7 L 193.2,97.3\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-46 atom-42 atom-17\" d=\"M 97.3,103.3 L 102.5,111.1\" style=\"fill:none;fill-rule:evenodd;stroke:#0000FF;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-46 atom-42 atom-17\" d=\"M 102.5,111.1 L 107.8,118.8\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-47 atom-39 atom-4\" d=\"M 266.3,195.7 L 253.0,176.0\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-48 atom-35 atom-5\" d=\"M 218.9,199.2 L 229.2,177.8\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-49 atom-26 atom-18\" d=\"M 95.0,163.9 L 97.4,140.3\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-50 atom-33 atom-28\" d=\"M 131.2,190.1 L 114.1,206.7\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path d=\"M 261.3,149.9 L 262.0,151.0 L 261.5,152.2\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 231.6,155.3 L 231.7,154.1 L 231.6,153.1\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 171.3,90.9 L 170.3,90.8 L 169.2,90.9\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 154.3,70.6 L 154.7,69.4 L 155.7,69.0\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 175.3,60.2 L 176.4,59.7 L 177.4,60.4\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 126.0,101.8 L 124.9,102.3 L 124.0,103.1\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 50.4,150.5 L 49.2,150.9 L 49.5,152.1\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 119.6,228.7 L 119.9,229.8 L 121.1,230.1\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 141.7,236.0 L 142.8,236.3 L 143.7,235.5\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 154.4,197.8 L 154.1,196.7 L 153.0,196.3\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 132.4,190.5 L 131.2,190.1 L 130.4,191.0\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 199.7,215.9 L 201.8,215.7 L 202.6,214.9\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 255.4,217.2 L 256.0,217.2 L 256.5,216.1\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 265.8,196.8 L 266.3,195.7 L 265.7,194.8\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 61.0,123.2 L 60.3,122.3 L 60.8,121.2\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 70.2,101.9 L 70.7,100.8 L 71.2,100.8\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path class=\"atom-1\" d=\"M 258.1 109.8 Q 258.1 108.1, 258.9 107.2 Q 259.7 106.3, 261.2 106.3 Q 262.7 106.3, 263.5 107.2 Q 264.3 108.1, 264.3 109.8 Q 264.3 111.4, 263.5 112.3 Q 262.7 113.2, 261.2 113.2 Q 259.7 113.2, 258.9 112.3 Q 258.1 111.4, 258.1 109.8 M 261.2 112.5 Q 262.2 112.5, 262.8 111.8 Q 263.3 111.1, 263.3 109.8 Q 263.3 108.4, 262.8 107.8 Q 262.2 107.1, 261.2 107.1 Q 260.2 107.1, 259.6 107.8 Q 259.1 108.4, 259.1 109.8 Q 259.1 111.1, 259.6 111.8 Q 260.2 112.5, 261.2 112.5 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-8\" d=\"M 209.4 111.3 Q 209.4 109.7, 210.2 108.8 Q 211.0 107.9, 212.5 107.9 Q 214.0 107.9, 214.8 108.8 Q 215.6 109.7, 215.6 111.3 Q 215.6 112.9, 214.8 113.9 Q 214.0 114.8, 212.5 114.8 Q 211.0 114.8, 210.2 113.9 Q 209.4 113.0, 209.4 111.3 M 212.5 114.0 Q 213.6 114.0, 214.1 113.4 Q 214.7 112.7, 214.7 111.3 Q 214.7 110.0, 214.1 109.3 Q 213.6 108.6, 212.5 108.6 Q 211.5 108.6, 210.9 109.3 Q 210.4 110.0, 210.4 111.3 Q 210.4 112.7, 210.9 113.4 Q 211.5 114.0, 212.5 114.0 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-15\" d=\"M 214.3 63.9 Q 214.3 62.3, 215.1 61.4 Q 215.9 60.5, 217.4 60.5 Q 218.9 60.5, 219.7 61.4 Q 220.5 62.3, 220.5 63.9 Q 220.5 65.6, 219.7 66.5 Q 218.9 67.4, 217.4 67.4 Q 215.9 67.4, 215.1 66.5 Q 214.3 65.6, 214.3 63.9 M 217.4 66.7 Q 218.4 66.7, 219.0 66.0 Q 219.5 65.3, 219.5 63.9 Q 219.5 62.6, 219.0 62.0 Q 218.4 61.3, 217.4 61.3 Q 216.4 61.3, 215.8 61.9 Q 215.3 62.6, 215.3 63.9 Q 215.3 65.3, 215.8 66.0 Q 216.4 66.7, 217.4 66.7 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-15\" d=\"M 221.5 60.6 L 222.5 60.6 L 222.5 63.5 L 225.9 63.5 L 225.9 60.6 L 226.8 60.6 L 226.8 67.3 L 225.9 67.3 L 225.9 64.2 L 222.5 64.2 L 222.5 67.3 L 221.5 67.3 L 221.5 60.6 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-22\" d=\"M 34.8 190.6 Q 34.8 189.0, 35.6 188.1 Q 36.4 187.2, 37.9 187.2 Q 39.4 187.2, 40.2 188.1 Q 41.0 189.0, 41.0 190.6 Q 41.0 192.2, 40.2 193.2 Q 39.4 194.1, 37.9 194.1 Q 36.4 194.1, 35.6 193.2 Q 34.8 192.2, 34.8 190.6 M 37.9 193.3 Q 38.9 193.3, 39.5 192.6 Q 40.0 191.9, 40.0 190.6 Q 40.0 189.3, 39.5 188.6 Q 38.9 187.9, 37.9 187.9 Q 36.9 187.9, 36.3 188.6 Q 35.8 189.3, 35.8 190.6 Q 35.8 191.9, 36.3 192.6 Q 36.9 193.3, 37.9 193.3 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-25\" d=\"M 80.6 203.6 Q 80.6 202.0, 81.4 201.1 Q 82.2 200.2, 83.7 200.2 Q 85.2 200.2, 86.0 201.1 Q 86.8 202.0, 86.8 203.6 Q 86.8 205.2, 86.0 206.2 Q 85.2 207.1, 83.7 207.1 Q 82.2 207.1, 81.4 206.2 Q 80.6 205.3, 80.6 203.6 M 83.7 206.3 Q 84.7 206.3, 85.3 205.7 Q 85.8 205.0, 85.8 203.6 Q 85.8 202.3, 85.3 201.6 Q 84.7 200.9, 83.7 200.9 Q 82.7 200.9, 82.1 201.6 Q 81.5 202.3, 81.5 203.6 Q 81.5 205.0, 82.1 205.7 Q 82.7 206.3, 83.7 206.3 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-25\" d=\"M 87.8 200.3 L 88.7 200.3 L 88.7 203.1 L 92.2 203.1 L 92.2 200.3 L 93.1 200.3 L 93.1 207.0 L 92.2 207.0 L 92.2 203.9 L 88.7 203.9 L 88.7 207.0 L 87.8 207.0 L 87.8 200.3 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-27\" d=\"M 97.7 187.0 Q 97.7 185.4, 98.5 184.5 Q 99.3 183.6, 100.8 183.6 Q 102.3 183.6, 103.1 184.5 Q 103.9 185.4, 103.9 187.0 Q 103.9 188.7, 103.1 189.6 Q 102.2 190.5, 100.8 190.5 Q 99.3 190.5, 98.5 189.6 Q 97.7 188.7, 97.7 187.0 M 100.8 189.8 Q 101.8 189.8, 102.4 189.1 Q 102.9 188.4, 102.9 187.0 Q 102.9 185.7, 102.4 185.0 Q 101.8 184.4, 100.8 184.4 Q 99.7 184.4, 99.2 185.0 Q 98.6 185.7, 98.6 187.0 Q 98.6 188.4, 99.2 189.1 Q 99.7 189.8, 100.8 189.8 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-36\" d=\"M 230.7 215.5 L 232.9 219.1 Q 233.2 219.4, 233.5 220.1 Q 233.9 220.7, 233.9 220.7 L 233.9 215.5 L 234.8 215.5 L 234.8 222.2 L 233.8 222.2 L 231.5 218.3 Q 231.2 217.9, 230.9 217.4 Q 230.6 216.8, 230.5 216.7 L 230.5 222.2 L 229.7 222.2 L 229.7 215.5 L 230.7 215.5 \" fill=\"#0000FF\"/>\n",
       "<path class=\"atom-42\" d=\"M 92.9 95.8 L 95.2 99.3 Q 95.4 99.7, 95.7 100.3 Q 96.1 101.0, 96.1 101.0 L 96.1 95.8 L 97.0 95.8 L 97.0 102.5 L 96.1 102.5 L 93.7 98.6 Q 93.4 98.1, 93.1 97.6 Q 92.8 97.1, 92.8 96.9 L 92.8 102.5 L 91.9 102.5 L 91.9 95.8 L 92.9 95.8 \" fill=\"#0000FF\"/>\n",
       "<path class=\"atom-42\" d=\"M 98.3 97.0 L 99.4 97.0 L 99.4 95.7 L 100.0 95.7 L 100.0 97.0 L 101.2 97.0 L 101.2 97.4 L 100.0 97.4 L 100.0 98.7 L 99.4 98.7 L 99.4 97.4 L 98.3 97.4 L 98.3 97.0 \" fill=\"#0000FF\"/>\n",
       "</svg>"
      ],
      "text/plain": [
       "<IPython.core.display.SVG object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Positive charge (+1) in C=C1CCCC(C)(C)C1CCC(C)=CCCC(C)=CCn1c[n+](C)c2ncnc(N)c21\n"
     ]
    },
    {
     "data": {
      "image/svg+xml": [
       "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:rdkit=\"http://www.rdkit.org/xml\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" version=\"1.1\" baseProfile=\"full\" xml:space=\"preserve\" width=\"300px\" height=\"300px\" viewBox=\"0 0 300 300\">\n",
       "<!-- END OF HEADER -->\n",
       "<rect style=\"opacity:1.0;fill:#FFFFFF;stroke:none\" width=\"300.0\" height=\"300.0\" x=\"0.0\" y=\"0.0\"> </rect>\n",
       "<path class=\"bond-0 atom-0 atom-1\" d=\"M 86.1,179.9 L 62.9,175.6\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-0 atom-0 atom-1\" d=\"M 86.7,176.6 L 63.5,172.3\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-1 atom-1 atom-2\" d=\"M 64.2,174.1 L 49.5,191.3\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-2 atom-2 atom-3\" d=\"M 49.5,191.3 L 27.2,187.1\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-3 atom-3 atom-4\" d=\"M 27.2,187.1 L 19.7,165.8\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-4 atom-4 atom-5\" d=\"M 19.7,165.8 L 34.5,148.6\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-5 atom-5 atom-6\" d=\"M 34.5,148.6 L 15.0,137.1\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-6 atom-5 atom-7\" d=\"M 34.5,148.6 L 42.4,127.4\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-7 atom-5 atom-8\" d=\"M 34.5,148.6 L 56.7,152.8\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-8 atom-8 atom-9\" d=\"M 56.7,152.8 L 71.4,135.6\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-9 atom-9 atom-10\" d=\"M 71.4,135.6 L 93.6,139.8\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-10 atom-10 atom-11\" d=\"M 93.6,139.8 L 108.4,122.6\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-11 atom-11 atom-12\" d=\"M 108.4,122.6 L 100.8,101.3\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-12 atom-11 atom-13\" d=\"M 108.4,122.6 L 130.6,126.8\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-12 atom-11 atom-13\" d=\"M 109.7,126.3 L 128.0,129.7\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-13 atom-13 atom-14\" d=\"M 130.6,126.8 L 138.1,148.1\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-14 atom-14 atom-15\" d=\"M 138.1,148.1 L 160.3,152.3\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-15 atom-15 atom-16\" d=\"M 160.3,152.3 L 167.8,173.6\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-16 atom-16 atom-17\" d=\"M 167.8,173.6 L 153.1,190.8\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-17 atom-16 atom-18\" d=\"M 167.8,173.6 L 190.0,177.8\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-17 atom-16 atom-18\" d=\"M 170.4,170.6 L 188.7,174.1\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-18 atom-18 atom-19\" d=\"M 190.0,177.8 L 204.8,160.6\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-19 atom-19 atom-20\" d=\"M 204.8,160.6 L 214.3,162.4\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-19 atom-19 atom-20\" d=\"M 214.3,162.4 L 223.8,164.2\" style=\"fill:none;fill-rule:evenodd;stroke:#0000FF;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-20 atom-20 atom-21\" d=\"M 228.9,168.7 L 232.8,177.0\" style=\"fill:none;fill-rule:evenodd;stroke:#0000FF;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-20 atom-20 atom-21\" d=\"M 232.8,177.0 L 236.7,185.2\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-21 atom-21 atom-22\" d=\"M 236.7,185.2 L 246.3,183.9\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-21 atom-21 atom-22\" d=\"M 246.3,183.9 L 255.9,182.7\" style=\"fill:none;fill-rule:evenodd;stroke:#0000FF;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-21 atom-21 atom-22\" d=\"M 238.7,181.5 L 247.1,180.4\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-21 atom-21 atom-22\" d=\"M 247.1,180.4 L 255.5,179.3\" style=\"fill:none;fill-rule:evenodd;stroke:#0000FF;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-22 atom-22 atom-23\" d=\"M 262.3,185.6 L 268.5,192.2\" style=\"fill:none;fill-rule:evenodd;stroke:#0000FF;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-22 atom-22 atom-23\" d=\"M 268.5,192.2 L 274.6,198.7\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-23 atom-22 atom-24\" d=\"M 259.8,178.3 L 261.6,169.2\" style=\"fill:none;fill-rule:evenodd;stroke:#0000FF;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-23 atom-22 atom-24\" d=\"M 261.6,169.2 L 263.3,160.1\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-24 atom-24 atom-25\" d=\"M 263.3,160.1 L 271.3,155.1\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-24 atom-24 atom-25\" d=\"M 271.3,155.1 L 279.4,150.2\" style=\"fill:none;fill-rule:evenodd;stroke:#0000FF;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-24 atom-24 atom-25\" d=\"M 263.2,156.1 L 270.4,151.7\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-24 atom-24 atom-25\" d=\"M 270.4,151.7 L 277.6,147.3\" style=\"fill:none;fill-rule:evenodd;stroke:#0000FF;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-25 atom-25 atom-26\" d=\"M 282.5,144.3 L 282.3,135.0\" style=\"fill:none;fill-rule:evenodd;stroke:#0000FF;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-25 atom-25 atom-26\" d=\"M 282.3,135.0 L 282.0,125.7\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-26 atom-26 atom-27\" d=\"M 282.0,125.7 L 273.7,121.1\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-26 atom-26 atom-27\" d=\"M 273.7,121.1 L 265.3,116.6\" style=\"fill:none;fill-rule:evenodd;stroke:#0000FF;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-26 atom-26 atom-27\" d=\"M 278.7,127.7 L 271.2,123.6\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-26 atom-26 atom-27\" d=\"M 271.2,123.6 L 263.7,119.6\" style=\"fill:none;fill-rule:evenodd;stroke:#0000FF;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-27 atom-27 atom-28\" d=\"M 259.0,116.8 L 250.9,121.7\" style=\"fill:none;fill-rule:evenodd;stroke:#0000FF;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-27 atom-27 atom-28\" d=\"M 250.9,121.7 L 242.9,126.6\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-28 atom-28 atom-29\" d=\"M 242.9,126.6 L 234.5,122.1\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-28 atom-28 atom-29\" d=\"M 234.5,122.1 L 226.2,117.5\" style=\"fill:none;fill-rule:evenodd;stroke:#0000FF;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-29 atom-28 atom-30\" d=\"M 242.9,126.6 L 243.4,149.2\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-29 atom-28 atom-30\" d=\"M 246.3,128.5 L 246.8,147.2\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-30 atom-8 atom-1\" d=\"M 56.7,152.8 L 64.2,174.1\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-31 atom-30 atom-20\" d=\"M 243.4,149.2 L 236.8,155.5\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-31 atom-30 atom-20\" d=\"M 236.8,155.5 L 230.2,161.8\" style=\"fill:none;fill-rule:evenodd;stroke:#0000FF;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-32 atom-30 atom-24\" d=\"M 243.4,149.2 L 263.3,160.1\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path d=\"M 63.5,175.0 L 64.2,174.1 L 63.8,173.0\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 50.2,190.4 L 49.5,191.3 L 48.4,191.1\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 28.4,187.3 L 27.2,187.1 L 26.9,186.0\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 20.1,166.8 L 19.7,165.8 L 20.5,164.9\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 70.7,136.5 L 71.4,135.6 L 72.5,135.8\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 92.5,139.6 L 93.6,139.8 L 94.4,138.9\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 129.5,126.6 L 130.6,126.8 L 131.0,127.8\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 137.7,147.0 L 138.1,148.1 L 139.2,148.3\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 159.2,152.1 L 160.3,152.3 L 160.7,153.3\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 188.9,177.6 L 190.0,177.8 L 190.8,176.9\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 204.0,161.5 L 204.8,160.6 L 205.2,160.7\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 236.5,184.8 L 236.7,185.2 L 237.2,185.1\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 282.0,126.1 L 282.0,125.7 L 281.6,125.5\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path class=\"atom-20\" d=\"M 225.6 161.6 L 227.7 165.0 Q 227.9 165.3, 228.2 165.9 Q 228.5 166.5, 228.6 166.5 L 228.6 161.6 L 229.4 161.6 L 229.4 168.0 L 228.5 168.0 L 226.3 164.3 Q 226.0 163.8, 225.7 163.3 Q 225.5 162.8, 225.4 162.7 L 225.4 168.0 L 224.6 168.0 L 224.6 161.6 L 225.6 161.6 \" fill=\"#0000FF\"/>\n",
       "<path class=\"atom-22\" d=\"M 257.7 179.1 L 259.8 182.5 Q 260.0 182.8, 260.3 183.4 Q 260.7 184.0, 260.7 184.1 L 260.7 179.1 L 261.5 179.1 L 261.5 185.5 L 260.7 185.5 L 258.4 181.8 Q 258.1 181.3, 257.9 180.8 Q 257.6 180.4, 257.5 180.2 L 257.5 185.5 L 256.7 185.5 L 256.7 179.1 L 257.7 179.1 \" fill=\"#0000FF\"/>\n",
       "<path class=\"atom-22\" d=\"M 262.7 180.2 L 263.9 180.2 L 263.9 179.1 L 264.4 179.1 L 264.4 180.2 L 265.5 180.2 L 265.5 180.7 L 264.4 180.7 L 264.4 181.9 L 263.9 181.9 L 263.9 180.7 L 262.7 180.7 L 262.7 180.2 \" fill=\"#0000FF\"/>\n",
       "<path class=\"atom-25\" d=\"M 281.2 145.1 L 283.3 148.5 Q 283.5 148.8, 283.8 149.4 Q 284.1 150.0, 284.1 150.1 L 284.1 145.1 L 285.0 145.1 L 285.0 151.5 L 284.1 151.5 L 281.9 147.8 Q 281.6 147.3, 281.3 146.8 Q 281.1 146.3, 281.0 146.2 L 281.0 151.5 L 280.1 151.5 L 280.1 145.1 L 281.2 145.1 \" fill=\"#0000FF\"/>\n",
       "<path class=\"atom-27\" d=\"M 260.8 111.6 L 262.9 115.0 Q 263.1 115.4, 263.4 116.0 Q 263.7 116.6, 263.8 116.6 L 263.8 111.6 L 264.6 111.6 L 264.6 118.1 L 263.7 118.1 L 261.5 114.3 Q 261.2 113.9, 260.9 113.4 Q 260.7 112.9, 260.6 112.8 L 260.6 118.1 L 259.7 118.1 L 259.7 111.6 L 260.8 111.6 \" fill=\"#0000FF\"/>\n",
       "<path class=\"atom-29\" d=\"M 210.7 112.6 L 211.5 112.6 L 211.5 115.3 L 214.8 115.3 L 214.8 112.6 L 215.7 112.6 L 215.7 119.0 L 214.8 119.0 L 214.8 116.0 L 211.5 116.0 L 211.5 119.0 L 210.7 119.0 L 210.7 112.6 \" fill=\"#0000FF\"/>\n",
       "<path class=\"atom-29\" d=\"M 216.9 118.8 Q 217.1 118.4, 217.4 118.2 Q 217.8 117.9, 218.3 117.9 Q 219.0 117.9, 219.3 118.3 Q 219.7 118.6, 219.7 119.2 Q 219.7 119.9, 219.2 120.4 Q 218.8 121.0, 217.8 121.7 L 219.8 121.7 L 219.8 122.2 L 216.9 122.2 L 216.9 121.8 Q 217.7 121.2, 218.2 120.8 Q 218.6 120.4, 218.9 120.0 Q 219.1 119.7, 219.1 119.3 Q 219.1 118.9, 218.9 118.6 Q 218.7 118.4, 218.3 118.4 Q 218.0 118.4, 217.8 118.5 Q 217.5 118.7, 217.4 119.0 L 216.9 118.8 \" fill=\"#0000FF\"/>\n",
       "<path class=\"atom-29\" d=\"M 221.6 112.6 L 223.7 116.0 Q 223.9 116.3, 224.2 116.9 Q 224.6 117.5, 224.6 117.6 L 224.6 112.6 L 225.4 112.6 L 225.4 119.0 L 224.6 119.0 L 222.3 115.3 Q 222.1 114.9, 221.8 114.4 Q 221.5 113.9, 221.4 113.7 L 221.4 119.0 L 220.6 119.0 L 220.6 112.6 L 221.6 112.6 \" fill=\"#0000FF\"/>\n",
       "</svg>"
      ],
      "text/plain": [
       "<IPython.core.display.SVG object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Positive charge (+1) in OCC1OC(Oc2cc3c(O)cc(O)cc3[o+]c2-c2ccc(O)c(O)c2)C(O)C(O)C1O\n"
     ]
    },
    {
     "data": {
      "image/svg+xml": [
       "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:rdkit=\"http://www.rdkit.org/xml\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" version=\"1.1\" baseProfile=\"full\" xml:space=\"preserve\" width=\"300px\" height=\"300px\" viewBox=\"0 0 300 300\">\n",
       "<!-- END OF HEADER -->\n",
       "<rect style=\"opacity:1.0;fill:#FFFFFF;stroke:none\" width=\"300.0\" height=\"300.0\" x=\"0.0\" y=\"0.0\"> </rect>\n",
       "<path class=\"bond-0 atom-0 atom-1\" d=\"M 269.2,156.1 L 257.4,154.9\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-0 atom-0 atom-1\" d=\"M 257.4,154.9 L 245.6,153.8\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-1 atom-1 atom-2\" d=\"M 245.6,153.8 L 229.0,176.8\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-2 atom-2 atom-3\" d=\"M 229.0,176.8 L 217.2,175.6\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-2 atom-2 atom-3\" d=\"M 217.2,175.6 L 205.4,174.5\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-3 atom-3 atom-4\" d=\"M 197.1,179.1 L 190.7,188.1\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-3 atom-3 atom-4\" d=\"M 190.7,188.1 L 184.3,197.1\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-4 atom-4 atom-5\" d=\"M 184.3,197.1 L 172.5,195.9\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-4 atom-4 atom-5\" d=\"M 172.5,195.9 L 160.7,194.7\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-5 atom-5 atom-6\" d=\"M 153.8,189.3 L 149.1,178.9\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-5 atom-5 atom-6\" d=\"M 149.1,178.9 L 144.4,168.4\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-6 atom-6 atom-7\" d=\"M 144.4,168.4 L 160.9,145.4\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-6 atom-6 atom-7\" d=\"M 142.3,164.0 L 156.0,144.9\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-7 atom-7 atom-8\" d=\"M 160.9,145.4 L 149.2,119.6\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-8 atom-8 atom-9\" d=\"M 149.2,119.6 L 165.7,96.5\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-8 atom-8 atom-9\" d=\"M 147.2,115.1 L 160.8,96.0\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-9 atom-9 atom-10\" d=\"M 165.7,96.5 L 177.5,97.7\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-9 atom-9 atom-10\" d=\"M 177.5,97.7 L 189.3,98.8\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-10 atom-9 atom-11\" d=\"M 165.7,96.5 L 154.0,70.7\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-11 atom-11 atom-12\" d=\"M 154.0,70.7 L 125.8,67.9\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-11 atom-11 atom-12\" d=\"M 151.2,74.7 L 127.8,72.4\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-12 atom-12 atom-13\" d=\"M 125.8,67.9 L 121.1,57.5\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-12 atom-12 atom-13\" d=\"M 121.1,57.5 L 116.4,47.2\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-13 atom-12 atom-14\" d=\"M 125.8,67.9 L 109.3,90.9\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-14 atom-14 atom-15\" d=\"M 109.3,90.9 L 121.0,116.8\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-14 atom-14 atom-15\" d=\"M 114.2,91.4 L 123.8,112.8\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-15 atom-15 atom-16\" d=\"M 121.0,116.8 L 114.5,125.8\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-15 atom-15 atom-16\" d=\"M 114.5,125.8 L 108.0,134.8\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-16 atom-16 atom-17\" d=\"M 106.8,144.9 L 111.4,155.3\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-16 atom-16 atom-17\" d=\"M 111.4,155.3 L 116.1,165.6\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-16 atom-16 atom-17\" d=\"M 110.6,143.2 L 114.8,152.4\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-16 atom-16 atom-17\" d=\"M 114.8,152.4 L 119.0,161.7\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-17 atom-17 atom-18\" d=\"M 116.1,165.6 L 99.6,188.7\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-18 atom-18 atom-19\" d=\"M 99.6,188.7 L 111.3,214.5\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-18 atom-18 atom-19\" d=\"M 96.7,192.7 L 106.4,214.1\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-19 atom-19 atom-20\" d=\"M 111.3,214.5 L 94.8,237.6\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-20 atom-20 atom-21\" d=\"M 94.8,237.6 L 66.5,234.8\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-20 atom-20 atom-21\" d=\"M 92.7,233.1 L 69.4,230.8\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-21 atom-21 atom-22\" d=\"M 66.5,234.8 L 60.1,243.8\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-21 atom-21 atom-22\" d=\"M 60.1,243.8 L 53.6,252.8\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-22 atom-21 atom-23\" d=\"M 66.5,234.8 L 54.8,209.0\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-23 atom-23 atom-24\" d=\"M 54.8,209.0 L 43.0,207.8\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-23 atom-23 atom-24\" d=\"M 43.0,207.8 L 31.2,206.6\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-24 atom-23 atom-25\" d=\"M 54.8,209.0 L 71.4,185.9\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-24 atom-23 atom-25\" d=\"M 59.7,209.4 L 73.4,190.4\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-25 atom-4 atom-26\" d=\"M 184.3,197.1 L 196.0,222.9\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-26 atom-26 atom-27\" d=\"M 196.0,222.9 L 189.5,231.9\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-26 atom-26 atom-27\" d=\"M 189.5,231.9 L 183.0,241.0\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-27 atom-26 atom-28\" d=\"M 196.0,222.9 L 224.2,225.7\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-28 atom-28 atom-29\" d=\"M 224.2,225.7 L 228.9,236.1\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-28 atom-28 atom-29\" d=\"M 228.9,236.1 L 233.6,246.5\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-29 atom-28 atom-30\" d=\"M 224.2,225.7 L 240.7,202.7\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-30 atom-30 atom-31\" d=\"M 240.7,202.7 L 252.5,203.8\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-30 atom-30 atom-31\" d=\"M 252.5,203.8 L 264.3,205.0\" style=\"fill:none;fill-rule:evenodd;stroke:#FF0000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-31 atom-30 atom-2\" d=\"M 240.7,202.7 L 229.0,176.8\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-32 atom-17 atom-6\" d=\"M 116.1,165.6 L 144.4,168.4\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-33 atom-25 atom-18\" d=\"M 71.4,185.9 L 99.6,188.7\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path class=\"bond-34 atom-15 atom-8\" d=\"M 121.0,116.8 L 149.2,119.6\" style=\"fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1\"/>\n",
       "<path d=\"M 246.1,153.8 L 245.6,153.8 L 244.7,154.9\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 160.1,146.5 L 160.9,145.4 L 160.3,144.1\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 154.6,72.0 L 154.0,70.7 L 152.6,70.5\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 110.1,89.8 L 109.3,90.9 L 109.9,92.2\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 110.7,213.2 L 111.3,214.5 L 110.5,215.7\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 95.6,236.4 L 94.8,237.6 L 93.4,237.4\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path d=\"M 70.5,187.1 L 71.4,185.9 L 72.8,186.0\" style=\"fill:none;stroke:#000000;stroke-width:2.0px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;\"/>\n",
       "<path class=\"atom-0\" d=\"M 270.1 156.6 Q 270.1 154.7, 271.0 153.6 Q 272.0 152.5, 273.8 152.5 Q 275.6 152.5, 276.5 153.6 Q 277.5 154.7, 277.5 156.6 Q 277.5 158.5, 276.5 159.6 Q 275.5 160.7, 273.8 160.7 Q 272.0 160.7, 271.0 159.6 Q 270.1 158.5, 270.1 156.6 M 273.8 159.8 Q 275.0 159.8, 275.7 159.0 Q 276.3 158.2, 276.3 156.6 Q 276.3 155.0, 275.7 154.2 Q 275.0 153.4, 273.8 153.4 Q 272.6 153.4, 271.9 154.2 Q 271.2 155.0, 271.2 156.6 Q 271.2 158.2, 271.9 159.0 Q 272.6 159.8, 273.8 159.8 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-0\" d=\"M 278.7 152.6 L 279.8 152.6 L 279.8 156.0 L 283.9 156.0 L 283.9 152.6 L 285.0 152.6 L 285.0 160.6 L 283.9 160.6 L 283.9 156.9 L 279.8 156.9 L 279.8 160.6 L 278.7 160.6 L 278.7 152.6 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-3\" d=\"M 197.1 174.0 Q 197.1 172.1, 198.1 171.0 Q 199.0 170.0, 200.8 170.0 Q 202.6 170.0, 203.5 171.0 Q 204.5 172.1, 204.5 174.0 Q 204.5 176.0, 203.5 177.1 Q 202.6 178.2, 200.8 178.2 Q 199.0 178.2, 198.1 177.1 Q 197.1 176.0, 197.1 174.0 M 200.8 177.3 Q 202.0 177.3, 202.7 176.5 Q 203.4 175.7, 203.4 174.0 Q 203.4 172.5, 202.7 171.7 Q 202.0 170.9, 200.8 170.9 Q 199.6 170.9, 198.9 171.7 Q 198.2 172.5, 198.2 174.0 Q 198.2 175.7, 198.9 176.5 Q 199.6 177.3, 200.8 177.3 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-5\" d=\"M 152.4 194.3 Q 152.4 192.4, 153.3 191.3 Q 154.3 190.2, 156.0 190.2 Q 157.8 190.2, 158.8 191.3 Q 159.7 192.4, 159.7 194.3 Q 159.7 196.3, 158.8 197.4 Q 157.8 198.5, 156.0 198.5 Q 154.3 198.5, 153.3 197.4 Q 152.4 196.3, 152.4 194.3 M 156.0 197.6 Q 157.3 197.6, 157.9 196.7 Q 158.6 195.9, 158.6 194.3 Q 158.6 192.7, 157.9 191.9 Q 157.3 191.1, 156.0 191.1 Q 154.8 191.1, 154.2 191.9 Q 153.5 192.7, 153.5 194.3 Q 153.5 195.9, 154.2 196.7 Q 154.8 197.6, 156.0 197.6 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-10\" d=\"M 190.3 99.3 Q 190.3 97.4, 191.2 96.3 Q 192.2 95.2, 193.9 95.2 Q 195.7 95.2, 196.7 96.3 Q 197.6 97.4, 197.6 99.3 Q 197.6 101.3, 196.7 102.4 Q 195.7 103.5, 193.9 103.5 Q 192.2 103.5, 191.2 102.4 Q 190.3 101.3, 190.3 99.3 M 193.9 102.6 Q 195.2 102.6, 195.8 101.8 Q 196.5 100.9, 196.5 99.3 Q 196.5 97.7, 195.8 96.9 Q 195.2 96.1, 193.9 96.1 Q 192.7 96.1, 192.0 96.9 Q 191.4 97.7, 191.4 99.3 Q 191.4 100.9, 192.0 101.8 Q 192.7 102.6, 193.9 102.6 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-10\" d=\"M 198.9 95.3 L 200.0 95.3 L 200.0 98.7 L 204.1 98.7 L 204.1 95.3 L 205.2 95.3 L 205.2 103.4 L 204.1 103.4 L 204.1 99.6 L 200.0 99.6 L 200.0 103.4 L 198.9 103.4 L 198.9 95.3 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-13\" d=\"M 102.5 38.1 L 103.6 38.1 L 103.6 41.5 L 107.7 41.5 L 107.7 38.1 L 108.8 38.1 L 108.8 46.1 L 107.7 46.1 L 107.7 42.4 L 103.6 42.4 L 103.6 46.1 L 102.5 46.1 L 102.5 38.1 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-13\" d=\"M 110.4 42.1 Q 110.4 40.1, 111.4 39.1 Q 112.3 38.0, 114.1 38.0 Q 115.9 38.0, 116.8 39.1 Q 117.8 40.1, 117.8 42.1 Q 117.8 44.0, 116.8 45.1 Q 115.9 46.2, 114.1 46.2 Q 112.3 46.2, 111.4 45.1 Q 110.4 44.0, 110.4 42.1 M 114.1 45.3 Q 115.3 45.3, 116.0 44.5 Q 116.7 43.7, 116.7 42.1 Q 116.7 40.5, 116.0 39.7 Q 115.3 38.9, 114.1 38.9 Q 112.9 38.9, 112.2 39.7 Q 111.5 40.5, 111.5 42.1 Q 111.5 43.7, 112.2 44.5 Q 112.9 45.3, 114.1 45.3 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-16\" d=\"M 100.7 139.8 Q 100.7 137.9, 101.7 136.8 Q 102.7 135.7, 104.4 135.7 Q 106.2 135.7, 107.2 136.8 Q 108.1 137.9, 108.1 139.8 Q 108.1 141.8, 107.2 142.9 Q 106.2 144.0, 104.4 144.0 Q 102.7 144.0, 101.7 142.9 Q 100.7 141.8, 100.7 139.8 M 104.4 143.1 Q 105.7 143.1, 106.3 142.3 Q 107.0 141.4, 107.0 139.8 Q 107.0 138.3, 106.3 137.5 Q 105.7 136.7, 104.4 136.7 Q 103.2 136.7, 102.5 137.4 Q 101.9 138.2, 101.9 139.8 Q 101.9 141.5, 102.5 142.3 Q 103.2 143.1, 104.4 143.1 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-16\" d=\"M 109.3 137.2 L 110.7 137.2 L 110.7 135.7 L 111.4 135.7 L 111.4 137.2 L 112.8 137.2 L 112.8 137.7 L 111.4 137.7 L 111.4 139.2 L 110.7 139.2 L 110.7 137.7 L 109.3 137.7 L 109.3 137.2 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-22\" d=\"M 38.4 253.9 L 39.5 253.9 L 39.5 257.3 L 43.6 257.3 L 43.6 253.9 L 44.7 253.9 L 44.7 261.9 L 43.6 261.9 L 43.6 258.2 L 39.5 258.2 L 39.5 261.9 L 38.4 261.9 L 38.4 253.9 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-22\" d=\"M 46.3 257.9 Q 46.3 255.9, 47.3 254.9 Q 48.2 253.8, 50.0 253.8 Q 51.8 253.8, 52.7 254.9 Q 53.7 255.9, 53.7 257.9 Q 53.7 259.8, 52.7 260.9 Q 51.8 262.0, 50.0 262.0 Q 48.2 262.0, 47.3 260.9 Q 46.3 259.8, 46.3 257.9 M 50.0 261.1 Q 51.2 261.1, 51.9 260.3 Q 52.6 259.5, 52.6 257.9 Q 52.6 256.3, 51.9 255.5 Q 51.2 254.7, 50.0 254.7 Q 48.8 254.7, 48.1 255.5 Q 47.5 256.3, 47.5 257.9 Q 47.5 259.5, 48.1 260.3 Q 48.8 261.1, 50.0 261.1 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-24\" d=\"M 15.0 202.2 L 16.1 202.2 L 16.1 205.6 L 20.2 205.6 L 20.2 202.2 L 21.3 202.2 L 21.3 210.2 L 20.2 210.2 L 20.2 206.5 L 16.1 206.5 L 16.1 210.2 L 15.0 210.2 L 15.0 202.2 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-24\" d=\"M 22.9 206.2 Q 22.9 204.3, 23.9 203.2 Q 24.8 202.1, 26.6 202.1 Q 28.4 202.1, 29.4 203.2 Q 30.3 204.3, 30.3 206.2 Q 30.3 208.1, 29.3 209.2 Q 28.4 210.3, 26.6 210.3 Q 24.8 210.3, 23.9 209.2 Q 22.9 208.1, 22.9 206.2 M 26.6 209.4 Q 27.8 209.4, 28.5 208.6 Q 29.2 207.8, 29.2 206.2 Q 29.2 204.6, 28.5 203.8 Q 27.8 203.0, 26.6 203.0 Q 25.4 203.0, 24.7 203.8 Q 24.1 204.6, 24.1 206.2 Q 24.1 207.8, 24.7 208.6 Q 25.4 209.4, 26.6 209.4 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-27\" d=\"M 167.8 242.0 L 168.9 242.0 L 168.9 245.4 L 173.0 245.4 L 173.0 242.0 L 174.1 242.0 L 174.1 250.0 L 173.0 250.0 L 173.0 246.3 L 168.9 246.3 L 168.9 250.0 L 167.8 250.0 L 167.8 242.0 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-27\" d=\"M 175.7 246.0 Q 175.7 244.0, 176.7 243.0 Q 177.7 241.9, 179.4 241.9 Q 181.2 241.9, 182.2 243.0 Q 183.1 244.0, 183.1 246.0 Q 183.1 247.9, 182.2 249.0 Q 181.2 250.1, 179.4 250.1 Q 177.7 250.1, 176.7 249.0 Q 175.7 247.9, 175.7 246.0 M 179.4 249.2 Q 180.7 249.2, 181.3 248.4 Q 182.0 247.6, 182.0 246.0 Q 182.0 244.4, 181.3 243.6 Q 180.7 242.8, 179.4 242.8 Q 178.2 242.8, 177.5 243.6 Q 176.9 244.4, 176.9 246.0 Q 176.9 247.6, 177.5 248.4 Q 178.2 249.2, 179.4 249.2 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-29\" d=\"M 232.2 251.6 Q 232.2 249.6, 233.2 248.6 Q 234.1 247.5, 235.9 247.5 Q 237.7 247.5, 238.6 248.6 Q 239.6 249.6, 239.6 251.6 Q 239.6 253.5, 238.6 254.6 Q 237.6 255.7, 235.9 255.7 Q 234.1 255.7, 233.2 254.6 Q 232.2 253.5, 232.2 251.6 M 235.9 254.8 Q 237.1 254.8, 237.8 254.0 Q 238.4 253.2, 238.4 251.6 Q 238.4 250.0, 237.8 249.2 Q 237.1 248.4, 235.9 248.4 Q 234.7 248.4, 234.0 249.2 Q 233.3 250.0, 233.3 251.6 Q 233.3 253.2, 234.0 254.0 Q 234.7 254.8, 235.9 254.8 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-29\" d=\"M 240.8 247.6 L 241.9 247.6 L 241.9 251.0 L 246.0 251.0 L 246.0 247.6 L 247.1 247.6 L 247.1 255.6 L 246.0 255.6 L 246.0 251.9 L 241.9 251.9 L 241.9 255.6 L 240.8 255.6 L 240.8 247.6 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-31\" d=\"M 265.3 205.5 Q 265.3 203.5, 266.2 202.5 Q 267.2 201.4, 268.9 201.4 Q 270.7 201.4, 271.7 202.5 Q 272.6 203.5, 272.6 205.5 Q 272.6 207.4, 271.7 208.5 Q 270.7 209.6, 268.9 209.6 Q 267.2 209.6, 266.2 208.5 Q 265.3 207.4, 265.3 205.5 M 268.9 208.7 Q 270.2 208.7, 270.8 207.9 Q 271.5 207.1, 271.5 205.5 Q 271.5 203.9, 270.8 203.1 Q 270.2 202.3, 268.9 202.3 Q 267.7 202.3, 267.0 203.1 Q 266.4 203.9, 266.4 205.5 Q 266.4 207.1, 267.0 207.9 Q 267.7 208.7, 268.9 208.7 \" fill=\"#FF0000\"/>\n",
       "<path class=\"atom-31\" d=\"M 273.9 201.5 L 275.0 201.5 L 275.0 204.9 L 279.1 204.9 L 279.1 201.5 L 280.2 201.5 L 280.2 209.5 L 279.1 209.5 L 279.1 205.8 L 275.0 205.8 L 275.0 209.5 L 273.9 209.5 L 273.9 201.5 \" fill=\"#FF0000\"/>\n",
       "</svg>"
      ],
      "text/plain": [
       "<IPython.core.display.SVG object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_tmp = df_substrates.copy()\n",
    "\n",
    "csmiles = df_substrates['CSMILES']\n",
    "\n",
    "charges = []\n",
    "\n",
    "expected_mz_H = []\n",
    "expected_mz_Na = []\n",
    "expected_mz_2H = []\n",
    "expected_mz_NH4 = []\n",
    "expected_mz_ACNH = []\n",
    "\n",
    "expected_mz2_H = []\n",
    "expected_mz2_Na = []\n",
    "expected_mz2_2H = []\n",
    "expected_mz2_NH4 = []\n",
    "expected_mz2_ACNH = []\n",
    "\n",
    "for csmi in csmiles:\n",
    "    mol = MolFromSmiles(csmi)\n",
    "    charge = Chem.rdmolops.GetFormalCharge(mol)\n",
    "    mw = ExactMolWt(mol) + 162.0533\n",
    "    mw2 = mw + 162.0533\n",
    "    \n",
    "    if charge == 0:\n",
    "        expected_mz_H.append(mw + 1.007276)\n",
    "        expected_mz_2H.append(mw/2 + 1.007276)\n",
    "        expected_mz_Na.append(mw + 22.989218)\n",
    "        expected_mz_NH4.append(mw + 18.033823)\n",
    "        expected_mz_ACNH.append(mw + 42.033823)\n",
    "        \n",
    "        expected_mz2_H.append(mw2 + 1.007276)\n",
    "        expected_mz2_2H.append(mw2/2 + 1.007276)\n",
    "        expected_mz2_Na.append(mw2 + 22.989218)\n",
    "        expected_mz2_NH4.append(mw2 + 18.033823)\n",
    "        expected_mz2_ACNH.append(mw2 + 42.033823)\n",
    "        \n",
    "           \n",
    "    elif charge > 0:\n",
    "        print(f'Positive charge (+{charge}) in {csmi}')\n",
    "        display(moltosvg(mol))\n",
    "        expected_mz_H.append(mw)\n",
    "        expected_mz_2H.append((mw + 1.007276)/(1. + charge))\n",
    "        expected_mz_Na.append((mw + 22.989218)/(1. + charge))\n",
    "        expected_mz_NH4.append((mw + 18.033823)/(1. + charge))\n",
    "        expected_mz_ACNH.append((mw + 42.033823)/(1. + charge))\n",
    "        \n",
    "        expected_mz2_H.append(mw2)\n",
    "        expected_mz2_2H.append((mw2 + 1.007276)/(1. + charge))\n",
    "        expected_mz2_Na.append((mw2 + 22.989218)/(1. + charge))\n",
    "        expected_mz2_NH4.append((mw2 + 18.033823)/(1. + charge))\n",
    "        expected_mz2_ACNH.append((mw2 + 42.033823)/(1. + charge))\n",
    "        \n",
    "        \n",
    "    else:\n",
    "        print(f'Error: negative charge in {csmi}')\n",
    "        break\n",
    "        \n",
    "    charges.append(charge)\n",
    "\n",
    "df_substrates['M_charge'] = charges\n",
    "df_substrates['M+Glu+H'] = expected_mz_H\n",
    "df_substrates['M+Glu+Na'] = expected_mz_Na\n",
    "df_substrates['M+Glu+2H'] = expected_mz_2H\n",
    "df_substrates['M+Glu+NH4'] = expected_mz_NH4\n",
    "df_substrates['M+ACN+H'] = expected_mz_ACNH\n",
    "\n",
    "df_substrates['M+2Glu+H'] = expected_mz2_H\n",
    "df_substrates['M+2Glu+Na'] = expected_mz2_Na\n",
    "df_substrates['M+2Glu+2H'] = expected_mz2_2H\n",
    "df_substrates['M+2Glu+NH4'] = expected_mz2_NH4\n",
    "df_substrates['M+2Glu+ACN+H'] = expected_mz2_ACNH"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "9df3b022",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_substrates.drop(columns=['RT'], inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "00703f2d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Let us also add the substrate superclass\n",
    "\n",
    "import urllib\n",
    "import json\n",
    "\n",
    "smiles = list(df_substrates['CSMILES'])\n",
    "class_list = []\n",
    "superclass_list = []\n",
    "pathway_list = []\n",
    "for smi in tqdm(smiles):\n",
    "    try:\n",
    "        with urllib.request.urlopen(\"https://npclassifier.ucsd.edu/classify?smiles=\" + smi) as url:\n",
    "            data = json.loads(url.read().decode(\"utf-8\"))\n",
    "            class_list.append(\";\".join(data['class_results']))\n",
    "            superclass_list.append(\";\".join(data['superclass_results']))\n",
    "            pathway_list.append(\";\".join(data['superclass_results']))\n",
    "    except:\n",
    "        print(f'SMILES request failed: {smi}')\n",
    "        class_list.append(np.nan)\n",
    "        superclass_list.append(np.nan)\n",
    "        pathway_list.append(np.nan)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5a1c496a",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_substrates['superclass'] = superclass_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "20c65844",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_substrates.to_csv('./tmp/Substrates_VB.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "013542e6",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "project_gt",
   "language": "python",
   "name": "project_gt"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
